Ammonia (N)

Ammonia (N) Report

Code

if (!requireNamespace("librarian", quietly = TRUE)) {
  # If not installed, install the package
  install.packages("librarian")
}

librarian::shelf(
  glue,
  here,
  skimr,
  ggplot2
)

data <- read.csv(here("data/df_cleaned.csv"))
parameter_name <- params$parameter_name

apply param bounds

bounds <- read.csv(here("parameter_bounds.csv"), stringsAsFactors = FALSE, strip.white = T)
lower_bound <- bounds$min[bounds$param == parameter_name]
upper_bound <- bounds$max[bounds$param == parameter_name]

filter_condition <- (data$Parameter == parameter_name & (data$Value < lower_bound | data$Value > upper_bound))

tryCatch({  # this tryCatch is for when filter_condition is logical(0) i.e. no matches
  data <- dplyr::filter(
    data, 
    !filter_condition
  )
  print(glue("{sum(filter_condition)} rows dropped as < {lower_bound} or > {upper_bound}"))
}, error = function(e){
  print(glue("no rows removed"))
})

no rows removed

apply param bounds

print(glue("{sum(filter_condition)} rows dropped as < {lower_bound} or > {upper_bound}"))

write cleaned DataFrame to a file

write.csv(data, here("data/df_cleaned_02.csv"), row.names = FALSE)

load data & skim

subset_data <- subset(data, Parameter == parameter_name)
print(skimr::skim(subset_data))

── Data Summary ────────────────────────
                           Values     
Name                       subset_data
Number of rows             10130      
Number of columns          17         
_______________________               
Column type frequency:                
  character                4          
  numeric                  13         
________________________              
Group variables            None       

── Variable type: character ────────────────────────────────────────────────────
  skim_variable n_missing complete_rate min max empty n_unique whitespace
1 Source                0             1   3  11     0        7          0
2 Site                  0             1   1   8     0      776          0
3 Parameter             0             1  11  11     0        1          0
4 Units                 0             1   4   4     0        1          0

── Variable type: numeric ──────────────────────────────────────────────────────
   skim_variable     n_missing complete_rate        mean        sd           p0
 1 ...1                      0         1     689230.     3110.     683929      
 2 Latitude                  0         1         26.6       0.744      24.9    
 3 Longitude                 0         1        -80.5       0.535     -85.2    
 4 Month                     0         1          6.75      3.38        1      
 5 Day                       0         1         13.7       8.01        1      
 6 Year                      0         1       2023.        0.0358   2023      
 7 Value                     0         1          0.0679    0.169       0.00165
 8 Sample.Depth            149         0.985      1.42     14.9         0.014  
 9 Total.Depth           10130         0        NaN        NA          NA      
10 verbatimValue             0         1          0.0679    0.169       0.00165
11 VerbatimLatitude          0         1         26.6       0.744      24.9    
12 verbatimLongitude         0         1        -80.5       0.535     -85.2    
13 Value_orig                0         1          0.0679    0.169       0.00165
            p25       p50        p75      p100 hist   
 1 686478.      689388.   691951.    694525    "▇▇▆▇▇"
 2     25.9         26.7      27.2       30.8  "▅▇▂▁▁"
 3    -80.9        -80.3     -80.1      -80.0  "▁▁▁▂▇"
 4      4            7        10         12    "▇▅▆▆▇"
 5      7           13        19         31    "▇▇▆▅▃"
 6   2023         2023      2023       2024    "▇▁▁▁▁"
 7      0.00918      0.02      0.061      6.77 "▇▁▁▁▁"
 8      0.5          0.5       0.5     1463    "▇▁▁▁▁"
 9     NA           NA        NA         NA    " "    
10      0.00918      0.02      0.061      6.77 "▇▁▁▁▁"
11     25.9         26.7      27.2       30.8  "▅▇▂▁▁"
12    -80.9        -80.3     -80.1      -80.0  "▁▁▁▂▇"
13      0.00918      0.02      0.061      6.77 "▇▁▁▁▁"

create params$parameter_name histogram

ggplot2::ggplot(subset_data, aes(x=Value)) +
    geom_histogram(bins=30, fill="blue", color="black") +
    scale_y_log10() +  # Transform the y-axis to a logarithmic scale
    labs(title=paste("Histogram of Values for", params$parameter_name),
         x="Value",
         y="Log Frequency") +
    theme_minimal()